library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# library(RColorBrewer)
# library(ggpubr)
# library(egg)
## eval 代码要不要运行
## echo 代码要不要输出
## include 图要不要输出
## warning 警告要不要输出
## message 默认Bin等信息要不要输出dftotal <- read_tsv("data/005_SNPAnnoDB/geneSNPAnno.txt.gz") %>%
mutate(Sub=str_sub(Transcript, 9, 9))## Rows: 2672838 Columns: 41
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (21): ID, Ref, Alt, Major, Minor, Transcript, Ancestral, Region, Variant...
## dbl (20): Chr, Pos, Maf, DAF, Gerp, Alt_SIFT, Ref_SIFT, Derived_SIFT, Gerp_1...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# filter(!is.na(Ancestral),Ancestral==Ref|Ancestral==Alt) %>%
# mutate(IfRefisAnc = if_else(Ref == Ancestral,"Anc","Der")) %>%
# mutate(Sub=str_sub(Transcript, 9, 9)) %>% triadsGeneSet <- read_tsv("data/021_homoeologsGene111/triadGenes1.1_V3.txt") %>%
filter(synteny=="syntenic") %>%
select(TriadID,A,B,D) %>%
pivot_longer(cols = c(A,B,D),names_to = "Sub",values_to = "Gene") %>%
pull(Gene)## Rows: 18474 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): TriadID, A, B, D, synteny
## dbl (6): cdsLenA, cdsLenB, cdsLenD, isAHC, isBHC, isDHC
## lgl (1): Expressed
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## dftotal_triads 数据库
dftotal_triads <- dftotal %>%
mutate(Gene=str_split_fixed(Transcript,"\\.",2)[,1]) %>%
filter(Gene %in% triadsGeneSet) %>%
select(-Sub,-Gene)
# write_tsv(dftotal_triads,"/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/004_annoDB/006_geneSNPAnnotation_merge/000_triads/001_geneSNPAnno_triads.txt.gz")### DeleteriousXPCLRS1000 java类“031_GERP16way_1.5andDerived_PolyPhen2_0.5” “032_GERP16way_1.5”
[33] “033_Derived_PolyPhen2_0.5”
>
### 跑一次,将数据存储起来,不用再跑。
### wide table -> long table 将 DelCountGroup (total, homo, heter)变成长表,后续可以filter;
### long table -> wide table 将 VariantsGroup 变成宽表,为标准化求ratio做准备;
### wide table -> long table 将 Stand_LoadGroup 变成长表, 为画图做准备。
fun_getDelRatio <- function(filenamefun){
dfdelRatio <- read_tsv(filenamefun,show_col_types = F) %>%
### wide -> long
pivot_longer(.,cols =c("TotalDerivedSNPCount","HomoDerivedSNPCount","HeterDerivedSNPCount","SiteCountWithMinDepth"),names_to ="DelCountGroup" ,values_to = "DelCount") %>%
### long -> wide
pivot_wider(.,names_from = "VariantsGroup",values_from = "DelCount") %>%
mutate(Ratio_002_nonsynonymous = `002_nonsynonymous`/`001_synonymous`,
# Ratio_003_nonsynGERPandDerivedSIFT = `003_nonsynGERPandDerivedSIFT`/`001_synonymous`,
Ratio_004_nonsynDerivedSIFT = `004_nonsynDerivedSIFT`/`001_synonymous`,
# Ratio_005_GERP = `005_GERP`/`001_synonymous`,
# Ratio_006_SIFT_StopGain = `006_SIFT_StopGain`/`001_synonymous`,
Ratio_007_VEP = `007_VEP`/`001_synonymous`,
Ratio_008_snpEff = `008_snpEff`/`001_synonymous`,
# Ratio_009_VEP_stopGained = `009_VEP_stopGained`/`001_synonymous`,
Ratio_010_GERP16way_1 = `010_GERP16way_1`/`001_synonymous`,
# Ratio_011_GERP16way_1.2_max = `011_GERP16way_1.2_max`/`001_synonymous`,
# Ratio_012_GERP16way_1.2_2.5 = `012_GERP16way_1.2_2.5`/`001_synonymous`,
# Ratio_013_GERP16way_2.5_max=
# `013_GERP16way_2.5_max`/`001_synonymous`,
Ratio_014_GERP16way_2.14_max = `014_GERP16way_2.14_max`/`001_synonymous`,
Ratio_015_LIST_S2=`015_LIST_S2`/`001_synonymous`,
# Ratio_016_PhyloP1.5_RefNotMask=`016_PhyloP1.5_RefNotMask`/`001_synonymous`,
Ratio_017_PhyloP1.5_RefMask=`017_PhyloP1.5_RefMask`/`001_synonymous`,
# Ratio_018_GERP16way_2.14andSIFT=`018_GERP16way_2.14andSIFT`/`001_synonymous`,
# Ratio_019_Alt_PolyPhen2 = `019_Alt_PolyPhen2`/`001_synonymous`,
# Ratio_020_Derived_PolyPhen2=`020_Derived_PolyPhen2`/`001_synonymous`,
# Ratio_021_GERP16way_2.14andPolyPhen2=`021_GERP16way_2.14andPolyPhen2`/`001_synonymous`,
# Ratio_022_GERP16way_1.78=`022_GERP16way_1.78`/`001_synonymous`,
# Ratio_023_GERP16way_1.78andPolyPhen2=`023_GERP16way_1.78andPolyPhen2`/`001_synonymous`,
# Ratio_024_GERP16way_1andPolyPhen2 = `024_GERP16way_1andPolyPhen2`/`001_synonymous`,
Ratio_025_GERP16way_3.1=`025_GERP16way_3.1`/`001_synonymous`,
# Ratio_026_Derived_PolyPhen2_probably = `026_Derived_PolyPhen2_probably`/`001_synonymous`,
# Ratio_027_GERP16way_1andDerived_PolyPhen2_probably=`027_GERP16way_1andDerived_PolyPhen2_probably`/`001_synonymous`,
# Ratio_028_GERP16way_1.78andDerived_PolyPhen2_probably=`028_GERP16way_1.78andDerived_PolyPhen2_probably`/`001_synonymous`,
# Ratio_029_GERP16way_2.14andDerived_PolyPhen2_probably=`029_GERP16way_2.14andDerived_PolyPhen2_probably`/`001_synonymous`,
# Ratio_030_GERP16way_3.1andDerived_PolyPhen2_probably=`030_GERP16way_3.1andDerived_PolyPhen2_probably`/`001_synonymous`,
Ratio_031_GERP16way_1.5andDerived_PolyPhen2_0.5=`031_GERP16way_1.5andDerived_PolyPhen2_0.5`/`001_synonymous`,
Ratio_032_GERP16way_1.5=`032_GERP16way_1.5`/`001_synonymous`,
Ratio_033_Derived_PolyPhen2_0.5=`033_Derived_PolyPhen2_0.5`/`001_synonymous`) %>%
### wide -> long
pivot_longer(.,cols =c("Ratio_002_nonsynonymous":"Ratio_033_Derived_PolyPhen2_0.5"),names_to ="Stand_LoadGroup" ,values_to = "Stand_delCount") %>%
### 去除不必要的列,方便画图和管理数据
select(- ("001_synonymous":"033_Derived_PolyPhen2_0.5")) %>%
write_tsv(file.path("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/005_delCount/005_merge",str_replace_all(basename(filenamefun),"delCount","delRatio")))
return(dfdelRatio)
}
### 函数调用
inputDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/005_delCount/005_merge")
# df <- dir(inputDir,pattern = "delCount",full.names = T) %>%
# map(fun_getDelRatio)“031_GERP16way_1.5andDerived_PolyPhen2_0.5” “032_GERP16way_1.5”
[33] “033_Derived_PolyPhen2_0.5”
>
### 先处理del文件
dfdel <- read_tsv("data/004_delCount/delRatio.txt.gz") ## Rows: 371456 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Taxa, Subgenome, IfSelected, Group_refobj, DelCountGroup, Stand_Loa...
## dbl (1): Stand_delCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("Ratio_002_nonsynonymous","Ratio_031_GERP16way_1.5andDerived_PolyPhen2_0.5","Ratio_007_VEP")
labelsB <- c("Nonsynonymous","Deleterious","High impact")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>% left_join(.,dftaxaDB,by="Taxa") %>%
# filter(DelCountGroup == "TotalDerivedSNPCount") %>%
select(-c("IfSelected","Group_refobj")) %>%
filter(GroupbyContinent %in% factorA) %>%
filter(Stand_LoadGroup %in% factorB) %>%
filter(DelCountGroup %in% factorC) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(Stand_LoadGroup=factor(Stand_LoadGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC))
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray80")
p <- df %>%
filter(DelCountGroup=="Additive") %>%
filter(Stand_LoadGroup=="Deleterious") %>%
ggplot(aes(x=Subgenome,y=Stand_delCount,fill=GroupbyContinent))+
# ylab("Mutation burden")+xlab("")+
ylab("Mutation load")+xlab("")+
# ylab("Derived del SNPs/syn SNPs per accession")+xlab("")+
### volin
# geom_violin(position = position_dodge(0.8),alpha=0.8,width=2) +
# geom_boxplot(position = position_dodge(0.8), width=0.01, color="white", alpha=0.2,outlier.color = NA) +
# # geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot
# geom_boxplot(position = position_dodge(0.8),outlier.color = NA, alpha = 0.8) +
# geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot2
geom_boxplot(position = position_dodge(0.8),outlier.color = 'white',alpha=0.5,outlier.size = 0.5)+
geom_point(position = position_jitterdodge(0.2),alpha = 0.3,aes(color=GroupbyContinent),size=0.03)+
stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### pointrange
# stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,position = position_dodge(0.8),geom="pointrange",size=0.3)+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
theme_classic()+
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=0.3, colour = "black"),
legend.position = 'none',
text = element_text(size = 12))+
theme(strip.background = element_blank())
pggsave(p,filename = "~/Documents/test.pdf",height = 3,width = 5.3)choiceA <- "Ratio_031_GERP16way_1.5andDerived_PolyPhen2_0.5"
### 先处理del文件
dfdel <- read_tsv("data/004_delCount/delRatio.txt.gz") ## Rows: 371456 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Taxa, Subgenome, IfSelected, Group_refobj, DelCountGroup, Stand_Loa...
## dbl (1): Stand_delCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("Ratio_002_nonsynonymous","Ratio_031_GERP16way_1.5andDerived_PolyPhen2_0.5","Ratio_007_VEP")
labelsB <- c("Nonsynonymous","Deleterious","High impact")
df <- dfdel %>% left_join(.,dftaxaDB,by="Taxa") %>%
filter(DelCountGroup == "TotalDerivedSNPCount") %>%
select(-c("IfSelected","Group_refobj","DelCountGroup")) %>%
filter(GroupbyContinent %in% factorA) %>%
filter(Stand_LoadGroup %in% factorB) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(Stand_LoadGroup=factor(Stand_LoadGroup,levels = factorB, labels = labelsB))
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray80")p <- df %>%
ggplot(aes(x=Subgenome,y=Stand_delCount,fill=GroupbyContinent))+
ylab("Mutation burden")+xlab("")+
### volin
geom_violin(position = position_dodge(0.8),alpha=0.8,width=2) +
geom_boxplot(position = position_dodge(0.8), width=0.01, color="white", alpha=0.2,outlier.color = NA) +
# geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot
# geom_boxplot(position = position_dodge(0.8),outlier.color = NA, alpha = 0.8) +
# geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### pointrange
# stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,position = position_dodge(0.8),geom="pointrange",size=0.3)+
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
facet_grid(Stand_LoadGroup~.,scales = "free")+
# facet_wrap(Stand_LoadGroup~.,scales = "free", ncol = 1)+
# ggtitle("Del total count under all genes in vmap 2.1-2021") +
theme(strip.background = element_blank())+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=0.4)
,legend.position = 'none'
,text = element_text(size = 9))
p## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
# ggsave(p,filename = "~/Documents/test.pdf",height = 3,width = 4)
ggsave(p,filename = "figs/Fig3/misc/whole_load.pdf",height = 3,width = 4)## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
### 先处理del文件
dfdel <- read_tsv("data/004_delCount/delCount.txt.gz") ## Rows: 95766 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("002_nonsynonymous","031_GERP16way_1.5andDerived_PolyPhen2_0.5","007_VEP")
labelsB <- c("Nonsynonymous","Deleterious","High impact")
df <- dfdel %>% left_join(.,dftaxaDB,by="Taxa") %>%
select(-c("IfSelected","Group_refobj","SiteCountWithMinDepth")) %>%
filter(GroupbyContinent %in% factorA) %>%
filter(VariantsGroup %in% factorB) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(VariantsGroup=factor(VariantsGroup,levels = factorB, labels = labelsB))
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray70")p <- df %>%
ggplot(aes(x=Subgenome,y=TotalDerivedSNPCount,fill=GroupbyContinent))+
labs(y="No. derived SNPs",x="")+
### volin
# geom_violin(position = position_dodge(0.8),alpha=0.8,width=2) +
# geom_boxplot(position = position_dodge(0.8), width=0.01, color="white", alpha=0.2,outlier.color = NA) +
# # geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot
# geom_boxplot(position = position_dodge(0.8),outlier.color = NA, alpha = 0.8) +
# geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### pointrange
stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,
fun.args = list(mult = 1),
position = position_dodge(0.8),geom="pointrange")+
stat_summary(fun=mean, geom="point", color="white",position = position_dodge(0.8))+
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
facet_grid(VariantsGroup~.,scales = "free")+
# facet_wrap(Stand_LoadGroup~.,scales = "free", ncol = 1)+
# ggtitle("Del total count under all genes in vmap 2.1-2021") +
theme(strip.background = element_blank())+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=0.4)
,legend.position = 'none'
,text = element_text(size = 9))
pggsave(p,filename = "~/Documents/test.pdf",height = 3,width = 4)
# ggsave(p,filename = "figs/Fig3/misc/whole_load_count.pdf",height = 3,width = 4)FigS_delCount FigS_delRatio FigS_DelVsNonsynProportion
FigS_delCount_triads FigS_delRatio_triads FigS_triads_DelVsNonsynProportion
FigS_delCount_hexaploid FigS_delRatio_hexaploid FigS_hexaploid_DelVsNonsynProportion
FigS_delCount_hexaploid_triads FigS_delRatio_hexaploid_triads FigS_hexaploid_triads_DelVsNonsynProportion
plot_getDelCount <- function(filenamefun){
dfdel <- read_tsv(filenamefun)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("001_synonymous","002_nonsynonymous","031_GERP16way_1.5andDerived_PolyPhen2_0.5","007_VEP")
labelsB <- c("Synonymous","Nonsynonymous","Deleterious","High impact")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>%
select(-c("IfSelected","Group_refobj","SiteCountWithMinDepth","HeterDerivedSNPCount")) %>%
pivot_longer(cols = c(TotalDerivedSNPCount,HomoDerivedSNPCount,),names_to = "DelCountGroup",values_to = "Count") %>%
left_join(.,dftaxaDB,by="Taxa") %>%
filter(GroupbyContinent %in% factorA) %>%
filter(VariantsGroup %in% factorB) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(VariantsGroup=factor(VariantsGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC))
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray70")
p <- df %>%
# filter(DelCountGroup=="Additive") %>%
ggplot(aes(x=Subgenome,y=Count,fill=GroupbyContinent))+
labs(y="No. derived SNPs",x="")+
### pointrange
stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,
fun.args = list(mult = 1),
position = position_dodge(0.8),geom="pointrange")+
stat_summary(fun=mean, geom="point", color="white",position = position_dodge(0.8))+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
facet_grid(VariantsGroup~DelCountGroup,scales = "free")+
# facet_wrap(Stand_LoadGroup~.,scales = "free", ncol = 1)+
theme(strip.background = element_blank())+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=0.4),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
text = element_text(size = 9))+
guides(color=guide_legend(ncol=7))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents/")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],".pdf")),height = 4.45,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_getDelCount("data/004_delCount/delCount.txt.gz")## Rows: 95766 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pp <- plot_getDelCount("data/004_delCount/delCount_triads.txt.gz")## Rows: 37726 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pplot_getDelCount <- function(filenamefun){
dfdel <- read_tsv(filenamefun)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("001_synonymous","002_nonsynonymous","031_GERP16way_1.5andDerived_PolyPhen2_0.5","007_VEP")
labelsB <- c("Synonymous","Nonsynonymous","Deleterious","High impact")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>%
select(-c("IfSelected","Group_refobj","SiteCountWithMinDepth","HeterDerivedSNPCount")) %>%
pivot_longer(cols = c(TotalDerivedSNPCount,HomoDerivedSNPCount,),names_to = "DelCountGroup",values_to = "Count") %>%
left_join(.,dftaxaDB,by="Taxa") %>%
filter(GroupbyContinent %in% factorA) %>%
filter(VariantsGroup %in% factorB) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(VariantsGroup=factor(VariantsGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC))
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray70")
p <- df %>%
# filter(DelCountGroup=="Additive") %>%
ggplot(aes(x=Subgenome,y=Count,fill=GroupbyContinent))+
labs(y="No. derived SNPs",x="")+
### pointrange
stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,
fun.args = list(mult = 1),
position = position_dodge(0.8),geom="pointrange")+
stat_summary(fun=mean, geom="point", color="white",position = position_dodge(0.8))+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
facet_grid(VariantsGroup~DelCountGroup,scales = "free")+
# facet_wrap(Stand_LoadGroup~.,scales = "free", ncol = 1)+
expand_limits(y = 0)+
theme(strip.background = element_blank())+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=0.4),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
text = element_text(size = 12))+
guides(color=guide_legend(ncol=7))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents/")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],".pdf")),height = 4.45,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_getDelCount("data/004_delCount/delCount.txt.gz")## Rows: 95766 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pp <- plot_getDelCount("data/004_delCount/delCount_triads.txt.gz")## Rows: 37726 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pplot_getDelRatio <- function(filenamefun){
### 先处理del文件
dfdel <- read_tsv(filenamefun)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("Ratio_002_nonsynonymous","Ratio_031_GERP16way_1.5andDerived_PolyPhen2_0.5","Ratio_007_VEP")
labelsB <- c("Nonsynonymous","Deleterious","High impact")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>% left_join(.,dftaxaDB,by="Taxa") %>%
# filter(DelCountGroup == "TotalDerivedSNPCount") %>%
select(-c("IfSelected","Group_refobj")) %>%
filter(GroupbyContinent %in% factorA) %>%
filter(Stand_LoadGroup %in% factorB) %>%
filter(DelCountGroup %in% factorC) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(Stand_LoadGroup=factor(Stand_LoadGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC))
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray80")
p <- df %>%
# filter(DelCountGroup=="Additive") %>%
# filter(Stand_LoadGroup=="Deleterious") %>%
ggplot(aes(x=Subgenome,y=Stand_delCount,fill=GroupbyContinent))+
# ylab("Mutation burden")+xlab("")+
ylab("Mutation load")+xlab("")+
# ylab("Derived del SNPs/syn SNPs per accession")+xlab("")+
### volin
# geom_violin(position = position_dodge(0.8),alpha=0.8,width=2) +
# geom_boxplot(position = position_dodge(0.8), width=0.01, color="white", alpha=0.2,outlier.color = NA) +
# # geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot
# geom_boxplot(position = position_dodge(0.8),outlier.color = NA, alpha = 0.8) +
# geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot2
geom_boxplot(position = position_dodge(0.8),outlier.color = 'white',alpha=0.5,outlier.size = 0.5)+
geom_point(position = position_jitterdodge(0.2),alpha = 0.3,aes(color=GroupbyContinent),size=0.03)+
stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### pointrange
# stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,position = position_dodge(0.8),geom="pointrange",size=0.3)+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
# facet_grid(Stand_LoadGroup~DelCountGroup,scales = "free")+
facet_wrap(Stand_LoadGroup~DelCountGroup,scales = "free", ncol = 2,
strip.position = c("right"))+
# ggtitle("Del total count under all genes in vmap 2.1-2021") +
theme_classic()+
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=0.3, colour = "black"),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
strip.background = element_rect(color = "white",fill = "gray90"),
text = element_text(size = 12))+
guides(color=guide_legend(ncol=7))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],".pdf")),height = 7.2,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_getDelRatio("data/004_delCount/delRatio.txt.gz")## Rows: 371456 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Taxa, Subgenome, IfSelected, Group_refobj, DelCountGroup, Stand_Loa...
## dbl (1): Stand_delCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
p# p <- plot_getDelRatio("data/004_delCount/delRatio_triads.txt.gz")
# pplot_getDelRatio <- function(filenamefun){
### 先处理del文件
dfdel <- read_tsv(filenamefun)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("Ratio_002_nonsynonymous","Ratio_031_GERP16way_1.5andDerived_PolyPhen2_0.5","Ratio_007_VEP")
labelsB <- c("Nonsynonymous","Deleterious","High impact")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>% left_join(.,dftaxaDB,by="Taxa") %>%
# filter(DelCountGroup == "TotalDerivedSNPCount") %>%
select(-c("IfSelected","Group_refobj")) %>%
filter(GroupbyContinent %in% factorA) %>%
filter(Stand_LoadGroup %in% factorB) %>%
filter(DelCountGroup %in% factorC) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(Stand_LoadGroup=factor(Stand_LoadGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC))
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray80")
p <- df %>%
# filter(DelCountGroup=="Additive") %>%
# filter(Stand_LoadGroup=="Deleterious") %>%
ggplot(aes(x=Subgenome,y=Stand_delCount,fill=GroupbyContinent))+
# ylab("Mutation burden")+xlab("")+
ylab("Mutation load")+xlab("")+
# ylab("Derived del SNPs/syn SNPs per accession")+xlab("")+
### volin
# geom_violin(position = position_dodge(0.8),alpha=0.8,width=2) +
# geom_boxplot(position = position_dodge(0.8), width=0.01, color="white", alpha=0.2,outlier.color = NA) +
# # geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot
# geom_boxplot(position = position_dodge(0.8),outlier.color = NA, alpha = 0.8) +
# geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot2
geom_boxplot(position = position_dodge(0.8),outlier.color = 'white',alpha=0.5,outlier.size = 0.5)+
geom_point(position = position_jitterdodge(0.2),alpha = 0.3,aes(color=GroupbyContinent),size=0.03)+
stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### pointrange
# stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,position = position_dodge(0.8),geom="pointrange",size=0.3)+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
# facet_grid(Stand_LoadGroup~DelCountGroup,scales = "free")+
facet_wrap(Stand_LoadGroup~DelCountGroup,scales = "free", ncol = 2,
strip.position = c("right"))+
# ggtitle("Del total count under all genes in vmap 2.1-2021") +
theme_classic()+
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=0.3, colour = "black"),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
strip.background = element_rect(color = "white",fill = "gray90"),
text = element_text(size = 12))+
guides(color=guide_legend(ncol=7))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],".pdf")),height = 7.2,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_getDelRatio("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/005_delCount/011_merge_byABsubDsub/delRatio.txt.gz")## Rows: 90048 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Taxa, Subgenome, IfSelected, Group_refobj, DelCountGroup, Stand_Loa...
## dbl (1): Stand_delCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
p# p <- plot_getDelRatio("data/004_delCount/delRatio_triads.txt.gz")
# pplot_delVsNonsyn <- function(filenamefun){
### 先处理del文件
# dfdel <- read_tsv("data/004_delCount/delCount.txt.gz")
dfdel <- read_tsv(filenamefun)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorB <- c("002_nonsynonymous","031_GERP16way_1.5andDerived_PolyPhen2_0.5")
labelsB <- c("Nonsynonymous","Deleterious")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>%
select(-c("IfSelected","Group_refobj","SiteCountWithMinDepth","HeterDerivedSNPCount")) %>%
pivot_longer(cols = c(TotalDerivedSNPCount,HomoDerivedSNPCount,),names_to = "DelCountGroup",values_to = "Count") %>%
left_join(.,dftaxaDB,by="Taxa") %>%
filter(GroupbyContinent %in% factorA) %>%
filter(VariantsGroup %in% factorB) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(VariantsGroup=factor(VariantsGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC)) %>%
pivot_wider(names_from =VariantsGroup,values_from = Count) %>%
mutate(Proportion_delVsNonsyn = Deleterious/Nonsynonymous)
colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray70")
p <- df %>%
ggplot(aes(x=Subgenome,y=Proportion_delVsNonsyn,fill=GroupbyContinent))+
ylab("Proportion of derived\ndel SNPs/nonsyn SNPs ")+xlab("")+
### boxplot2
geom_boxplot(position = position_dodge(0.8),outlier.color = 'white',alpha=0.5,outlier.size = 0.5)+
geom_point(position = position_jitterdodge(0.2),alpha = 0.3,aes(color=GroupbyContinent),size=0.03)+
stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### pointrange
# stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,position = position_dodge(0.8),geom="pointrange",size=0.3)+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
# facet_grid(Stand_LoadGroup~DelCountGroup,scales = "free")+
facet_wrap(~DelCountGroup,scales = "free", ncol = 2,
strip.position = c("top"))+
# ggtitle("Del total count under all genes in vmap 2.1-2021") +
theme_classic()+
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=0.3, colour = "black"),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
strip.background = element_rect(color = "white",fill = "gray90"),
text = element_text(size = 12))+
guides(color=guide_legend(ncol=7))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents/")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],"_DelVsNonsynProportion.pdf")),height = 3,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_delVsNonsyn("data/004_delCount/delCount.txt.gz")## Rows: 95766 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pp <- plot_delVsNonsyn("data/004_delCount/delCount_triads.txt.gz")## Rows: 37726 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pplot_getDelCount <- function(filenamefun){
dfdel <- read_tsv(filenamefun)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
# factorA <- c("AT","WE","DE","FTT","LR_EU","CL","LR_EA")
factorA <- c("LR_EU","CL","LR_EA")
factorB <- c("001_synonymous","002_nonsynonymous","031_GERP16way_1.5andDerived_PolyPhen2_0.5","007_VEP")
labelsB <- c("Synonymous","Nonsynonymous","Deleterious","High impact")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>%
select(-c("IfSelected","Group_refobj","SiteCountWithMinDepth","HeterDerivedSNPCount")) %>%
pivot_longer(cols = c(TotalDerivedSNPCount,HomoDerivedSNPCount,),names_to = "DelCountGroup",values_to = "Count") %>%
left_join(.,dftaxaDB,by="Taxa") %>%
filter(GroupbyContinent %in% factorA) %>%
filter(VariantsGroup %in% factorB) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(VariantsGroup=factor(VariantsGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC))
# colB <- c("#87cef9","#ffd702","#7f5701","#016699", "#fc6e6e","#9900ff","gray70")
colB <- c("#fc6e6e","#9900ff","gray70")
p <- df %>%
# filter(DelCountGroup=="Additive") %>%
ggplot(aes(x=Subgenome,y=Count,fill=GroupbyContinent))+
labs(y="No. derived SNPs",x="")+
### pointrange
stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,
fun.args = list(mult = 1),
position = position_dodge(0.8),geom="pointrange")+
stat_summary(fun=mean, geom="point", color="white",position = position_dodge(0.8))+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
facet_grid(VariantsGroup~DelCountGroup,scales = "free")+
# facet_wrap(Stand_LoadGroup~.,scales = "free", ncol = 1)+
expand_limits(y = 0)+ ### y 轴是否从0 开始
theme(strip.background = element_blank())+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=0.4),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
text = element_text(size = 9))+
guides(color=guide_legend(ncol=3))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents/")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],".pdf")),height = 4.45,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_getDelCount("data/004_delCount/delCount_hexaploid.txt.gz")## Rows: 37726 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pp <- plot_getDelCount("data/004_delCount/delCount_hexaploid_triads.txt.gz")## Rows: 37726 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Taxa, Subgenome, VariantsGroup, IfSelected, Group_refobj
## dbl (4): TotalDerivedSNPCount, HomoDerivedSNPCount, HeterDerivedSNPCount, Si...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pplot_getDelRatio <- function(filenamefun){
### 先处理del文件
dfdel <- read_tsv(filenamefun)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
factorA <- c("LR_EU","CL","LR_EA")
factorB <- c("Ratio_002_nonsynonymous","Ratio_031_GERP16way_1.5andDerived_PolyPhen2_0.5","Ratio_007_VEP")
labelsB <- c("Nonsynonymous","Deleterious","High impact")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>% left_join(.,dftaxaDB,by="Taxa") %>%
# filter(DelCountGroup == "TotalDerivedSNPCount") %>%
select(-c("IfSelected","Group_refobj")) %>%
filter(GroupbyContinent %in% factorA) %>%
filter(Stand_LoadGroup %in% factorB) %>%
filter(DelCountGroup %in% factorC) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(Stand_LoadGroup=factor(Stand_LoadGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC))
colB <- c("#fc6e6e","#9900ff","gray80")
p <- df %>%
# filter(DelCountGroup=="Additive") %>%
# filter(Stand_LoadGroup=="Deleterious") %>%
ggplot(aes(x=Subgenome,y=Stand_delCount,fill=GroupbyContinent))+
# ylab("Mutation burden")+xlab("")+
ylab("Mutation load")+xlab("")+
# ylab("Derived del SNPs/syn SNPs per accession")+xlab("")+
### volin
# geom_violin(position = position_dodge(0.8),alpha=0.8,width=2) +
# geom_boxplot(position = position_dodge(0.8), width=0.01, color="white", alpha=0.2,outlier.color = NA) +
# # geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot
# geom_boxplot(position = position_dodge(0.8),outlier.color = NA, alpha = 0.8) +
# geom_point(position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.8),aes(color=GroupbyContinent),alpha=0.3)+
# stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
### boxplot2
geom_boxplot(position = position_dodge(0.8),outlier.color = 'white',alpha=0.5,outlier.size = 0.5)+
geom_point(position = position_jitterdodge(0.2),alpha = 0.3,aes(color=GroupbyContinent),size=0.03)+
stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### pointrange
# stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,position = position_dodge(0.8),geom="pointrange",size=0.3)+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
# facet_grid(Stand_LoadGroup~DelCountGroup,scales = "free")+
facet_wrap(Stand_LoadGroup~DelCountGroup,scales = "free", ncol = 2,
strip.position = c("right"))+
# ggtitle("Del total count under all genes in vmap 2.1-2021") +
theme_classic()+
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=0.3, colour = "black"),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
strip.background = element_rect(color = "white",fill = "gray90"),
text = element_text(size = 12))+
guides(color=guide_legend(ncol=3))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents/")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],".pdf")),height = 7.2,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_getDelRatio("data/004_delCount/delRatio_hexaploid.txt.gz")## Rows: 139296 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Taxa, Subgenome, IfSelected, Group_refobj, DelCountGroup, Stand_Loa...
## dbl (1): Stand_delCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pp <- plot_getDelRatio("data/004_delCount/delRatio_hexaploid_triads.txt.gz")## Rows: 139296 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Taxa, Subgenome, IfSelected, Group_refobj, DelCountGroup, Stand_Loa...
## dbl (1): Stand_delCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pplot_delVsNonsyn <- function(filenamefun){
### 先处理del文件
# dfdel <- read_tsv("data/004_delCount/delCount.txt.gz")
dfdel <- read_tsv(filenamefun,show_col_types = FALSE)
dftaxaDB <- read_tsv("data/001_TaxaDB/WheatVMap2_GermplasmInfo.txt") %>%
select(Taxa=VcfID,GenomeType,GroupbyContinent)
factorA <- c("LR_EU","CL","LR_EA")
factorB <- c("002_nonsynonymous","031_GERP16way_1.5andDerived_PolyPhen2_0.5")
labelsB <- c("Nonsynonymous","Deleterious")
factorC <- c("TotalDerivedSNPCount","HomoDerivedSNPCount")
labelsC <- c("Additive","Recessive")
df <- dfdel %>%
select(-c("IfSelected","Group_refobj","SiteCountWithMinDepth","HeterDerivedSNPCount")) %>%
pivot_longer(cols = c(TotalDerivedSNPCount,HomoDerivedSNPCount,),names_to = "DelCountGroup",values_to = "Count") %>%
left_join(.,dftaxaDB,by="Taxa") %>%
filter(GroupbyContinent %in% factorA) %>%
filter(VariantsGroup %in% factorB) %>%
droplevels() %>%
mutate(GroupbyContinent= factor(GroupbyContinent, levels = factorA)) %>%
mutate(VariantsGroup=factor(VariantsGroup,levels = factorB, labels = labelsB)) %>%
mutate(DelCountGroup=factor(DelCountGroup,levels = factorC,labels = labelsC)) %>%
pivot_wider(names_from =VariantsGroup,values_from = Count) %>%
mutate(Proportion_delVsNonsyn = Deleterious/Nonsynonymous)
colB <- c("#fc6e6e","#9900ff","gray70")
p <- df %>%
ggplot(aes(x=Subgenome,y=Proportion_delVsNonsyn,fill=GroupbyContinent))+
ylab("Proportion of derived\ndel SNPs/nonsyn SNPs ")+xlab("")+
### boxplot2
geom_boxplot(position = position_dodge(0.8),outlier.color = 'white',alpha=0.5,outlier.size = 0.5)+
geom_point(position = position_jitterdodge(0.2),alpha = 0.3,aes(color=GroupbyContinent),size=0.03)+
stat_summary(fun=mean, geom="point", color="red",position = position_dodge(0.8),size=0.5)+
geom_vline(xintercept = c(1.5,2.5),color = "gray", linetype="dashed")+
### pointrange
# stat_summary(aes(color=GroupbyContinent),fun.data=mean_sdl,position = position_dodge(0.8),geom="pointrange",size=0.3)+
### other para
scale_color_manual(values = colB,name='')+
scale_fill_manual(values = colB,name='')+
# facet_grid(Stand_LoadGroup~DelCountGroup,scales = "free")+
facet_wrap(~DelCountGroup,scales = "free", ncol = 2,
strip.position = c("top"))+
# ggtitle("Del total count under all genes in vmap 2.1-2021") +
theme_classic()+
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size=0.3, colour = "black"),
# legend.position = 'none',
### 添加 legend
legend.key = element_blank(),
legend.position = "bottom",
strip.background = element_rect(color = "white",fill = "gray90"),
text = element_text(size = 12))+
guides(color=guide_legend(ncol=3))
p
outDir <- c("/Users/Aoyue/project/wheatVMap2_1000/002_dataAnalysis/RProject/R4VMap2FigsS1062/figs/FigS_delCount/misc")
outDir <- c("~/Documents/")
ggsave(p,filename = file.path(outDir,str_c(str_split_fixed(basename(filenamefun),".txt",2)[,1],"_DelVsNonsynProportion.pdf")),height = 3,width = 7.2)
library("ggpubr")
ggp_legend <- get_legend(p)
ggsave(as_ggplot(ggp_legend),filename = "~/Documents/legend.pdf",height = 1,width = 5)
return(p)
}
p <- plot_delVsNonsyn("data/004_delCount/delCount_hexaploid.txt.gz")## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pp <- plot_delVsNonsyn("data/004_delCount/delCount_hexaploid_triads.txt.gz")## Rows: 1062 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (33): VcfID, Group, Taxa, Accessions_byLuLab, GenomeType, Genus, Species...
## dbl (5): Longitude, Latitude, Altitude, ReleasedIntroducedYear, IfOnVmap2_S643
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
p